{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "https://zhuanlan.zhihu.com/p/112362700"
      ],
      "metadata": {
        "id": "QQ1IkT8wBBj-"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import re\n",
        "import random\n",
        "import jieba\n",
        "import pandas as pd\n",
        "import numpy as np\n",
        "from sklearn.feature_extraction.text import TfidfTransformer, TfidfVectorizer\n",
        "import matplotlib.pyplot as plt\n",
        "from sklearn.decomposition import PCA\n",
        "from sklearn.cluster import KMeans\n",
        "import gensim\n",
        "from gensim.models import Word2Vec\n",
        "from sklearn.preprocessing import scale\n",
        "import multiprocessing"
      ],
      "metadata": {
        "id": "kkbIrPUK0LQf"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "corpus = [\n",
        "    '花呗更改绑定银行卡',\n",
        "    '我什么时候开通了花呗',\n",
        "    'A man is eating food.',\n",
        "    'A man is eating a piece of bread.',\n",
        "    'The girl is carrying a baby.',\n",
        "    'A man is riding a horse.',\n",
        "    'A woman is playing violin.',\n",
        "    'Two men pushed carts through the woods.',\n",
        "    'A man is riding a white horse on an enclosed ground.'\n",
        "]"
      ],
      "metadata": {
        "id": "dwKdhQT4l6Y2"
      },
      "execution_count": 36,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "jieba.add_word(\"花呗\")"
      ],
      "metadata": {
        "id": "qZsUKvqCnAsD"
      },
      "execution_count": 26,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def preprocess_text(content_lines, sentences):\n",
        "    for line in content_lines:\n",
        "        try:\n",
        "            segs=jieba.lcut(line)\n",
        "            segs = [v for v in segs if not str(v).isdigit()]#去数字\n",
        "            segs = list(filter(lambda x:x.strip(), segs))   #去左右空格\n",
        "            segs = list(filter(lambda x:len(x)>1, segs)) #长度为1的字符\n",
        "            # segs = list(filter(lambda x:x not in stopwords, segs)) #去掉停用词\n",
        "            sentences.append(\" \".join(segs))\n",
        "        except Exception:\n",
        "            print(line)\n",
        "            continue"
      ],
      "metadata": {
        "id": "wl08tWyD0N3v"
      },
      "execution_count": 27,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "sentences = []\n",
        "# 处理语料，语料的处理结果存放在sentences\n",
        "preprocess_text(corpus, sentences)"
      ],
      "metadata": {
        "id": "jpmgGXHf0wwg"
      },
      "execution_count": 37,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "sentences"
      ],
      "metadata": {
        "id": "Fp6WsY-czt2r",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "aa8e2799-0819-4629-d1e6-0d074297cbc1"
      },
      "execution_count": 38,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "['花呗 更改 绑定 银行卡',\n",
              " '什么 时候 开通 花呗',\n",
              " 'man is eating food',\n",
              " 'man is eating piece of bread',\n",
              " 'The girl is carrying baby',\n",
              " 'man is riding horse',\n",
              " 'woman is playing violin',\n",
              " 'Two men pushed carts through the woods',\n",
              " 'man is riding white horse on an enclosed ground']"
            ]
          },
          "metadata": {},
          "execution_count": 38
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5)\n",
        "#统计每个词语的tf-idf权值\n",
        "transformer = TfidfTransformer()\n",
        "# 第一个fit_transform是计算tf-idf 第二个fit_transform是将文本转为词频矩阵\n",
        "tfidf = transformer.fit_transform(vectorizer.fit_transform(sentences))\n",
        "# 获取词袋模型中的所有词语\n",
        "word = vectorizer.get_feature_names_out()\n",
        "# 将tf-idf矩阵抽取出来，元素w[i][j]表示j词在i类文本中的tf-idf权重\n",
        "weight = tfidf.toarray()\n",
        "#查看特征大小\n",
        "print ('Features length: ' + str(len(word)))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Da0-99pm1-18",
        "outputId": "9fff2717-64f6-4197-b515-58de58ccf933"
      },
      "execution_count": 39,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Features length: 33\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "weight.shape"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "3iL5ffnhnO1A",
        "outputId": "4f6064e5-0db8-4222-c880-d8a7d36abb12"
      },
      "execution_count": 47,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(9, 33)"
            ]
          },
          "metadata": {},
          "execution_count": 47
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.decomposition import PCA\n",
        "\n",
        "class JieKmeans:\n",
        "    def __init__(self, numClass=4, n_components=10, func_type='PCA'):\n",
        "        #这里也可以选择随机初始化init=\"random\"\n",
        "\n",
        "        self.PCA = PCA(n_components=n_components)\n",
        "        if func_type == 'PCA':\n",
        "            self.func_plot = PCA(n_components=2)\n",
        "        elif func_type == 'TSNE':\n",
        "            from sklearn.manifold import TSNE\n",
        "            self.func_plot = TSNE(2)\n",
        "\n",
        "        self.numClass = numClass\n",
        "\n",
        "    def plot_cluster(self, result, newData):\n",
        "\n",
        "        plt.figure(2)\n",
        "        Lab = [[] for i in range(self.numClass)]\n",
        "        index = 0\n",
        "        for labi in result:\n",
        "            Lab[labi].append(index)\n",
        "            index += 1\n",
        "        color = ['oy', 'ob', 'og', 'cs', 'ms', 'bs', 'ks', 'ys', 'yv', 'mv', 'bv', 'kv', 'gv', 'y^', 'm^', 'b^', 'k^',\n",
        "                    'g^'] * 3\n",
        "\n",
        "        for i in range(self.numClass):\n",
        "            x1 = []\n",
        "            y1 = []\n",
        "            for ind1 in newData[Lab[i]]:\n",
        "                # print ind1\n",
        "                try:\n",
        "                    y1.append(ind1[1])\n",
        "                    x1.append(ind1[0])\n",
        "                except:\n",
        "                    pass\n",
        "            plt.plot(x1, y1, color[i])\n",
        "\n",
        "        #绘制初始中心点\n",
        "        x1 = []\n",
        "        y1 = []\n",
        "        for ind1 in self.model.cluster_centers_:\n",
        "            try:\n",
        "                y1.append(ind1[1])\n",
        "                x1.append(ind1[0])\n",
        "            except:\n",
        "                pass\n",
        "        plt.plot(x1, y1, \"rv\") #绘制中心\n",
        "        plt.show()\n",
        "\n",
        "\n",
        "    def train(self, data):\n",
        "        tmp = self.PCA.fit_transform(data)\n",
        "        self.model = KMeans(\n",
        "            n_clusters=self.numClass,\n",
        "            max_iter=10000, init=\"k-means++\", tol=1e-6)\n",
        "        s = self.model.fit(tmp)\n",
        "        print(\"聚类算法训练完成\\n\", s)\n",
        "\n",
        "\n",
        "\n",
        "    def predict(self, data):\n",
        "        t_data = self.PCA.fit_transform(data)\n",
        "        result = list(self.model.predict(t_data))\n",
        "        return result\n",
        "\n",
        "\n",
        "    def plot(self, weight):\n",
        "        t_data = self.PCA.fit_transform(weight)\n",
        "        result = list(self.model.predict(t_data))\n",
        "        plot_pos = self.func_plot.fit_transform(weight)\n",
        "        self.plot_cluster(result, plot_pos)"
      ],
      "metadata": {
        "id": "WFmlOL3p4jyp"
      },
      "execution_count": 48,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "net = JieKmeans(\n",
        "    numClass=3,\n",
        "    n_components=5,\n",
        "    func_type='PCA'\n",
        "    )\n",
        "\n",
        "net.train(weight)\n",
        "# net.plot(weight)"
      ],
      "metadata": {
        "id": "95TzdOza-Zvy",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "1499c4bd-6253-4cd5-b73a-235c48ec7aa5"
      },
      "execution_count": 49,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "聚类算法训练完成\n",
            " KMeans(max_iter=10000, n_clusters=3, tol=1e-06)\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning\n",
            "  warnings.warn(\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "net.plot(weight)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 451
        },
        "id": "25fXUyWIFqLF",
        "outputId": "a4912627-3a88-4238-db7e-a50c89378dd9"
      },
      "execution_count": 50,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<Figure size 640x480 with 1 Axes>"
            ],
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAi8AAAGdCAYAAADaPpOnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAlLklEQVR4nO3df3BU1f3/8dfuwm7qyCYwIT9IFimggj/phx8xthnbugWUWhxkSpUKMlQ7FSgaHYXWgooliGhDBWWkOrYz+kFlwOFraRwbYIo2goLMUEA6IJTwY0MoZTdCJ4Hd8/0jnwQWkpCF3N09yfMxcyeTc8/dfeeQyb44995zXcYYIwAAAEu4U10AAABAIggvAADAKoQXAABgFcILAACwCuEFAABYhfACAACsQngBAABWIbwAAACrdEt1AR0tFovp8OHD6tGjh1wuV6rLAQAA7WCMUV1dnfr06SO3u+25lU4XXg4fPqxAIJDqMgAAwCWorq5WYWFhm306XXjp0aOHpMYf3u/3p7gaAADQHpFIRIFAoPlzvC2dLrw0nSry+/2EFwAALNOeSz64YBcAAFiF8AIAAKxCeAEAAFYhvAAAAKsQXgAAgFUILwAAwCqEFwAAYBXCCwAAsEqnW6QOAKKxqDYe2KgjdUeU3yNfJX1L5HF7Ul0WgA5CeAHQqazatUozK2bqYORgc1uhv1CLRy/WuMHjUlgZgI7CaSMAncaqXas0/t3xccFFkg5FDmn8u+O1ateqFFUGoCMRXgB0CtFYVDMrZsrIXLCvqe2RikcUjUWTXRqADkZ4AdApbDyw8YIZl3MZGVVHqrXxwMYkVgXACYQXAJ3CkbojHdoPQPoivADoFPJ75HdoPwDpi7uNAHQKJX1LVOgv1KHIoRave3HJpUJ/oUr6lsTvqK6Wamtbf+GcHKmwsIOrBXA5CC8AOgWP26PFoxdr/Lvj5ZIrLsC45JIklY8uj1/vpb5eGj5cqqlp/YXz8qT9+yWfz6HKASSK00YAOo1xg8dp5Y9XqsBfENde6C/Uyh+vvHCdF69X6ttXcrfyp9DtlgKBxn4A0obLGHPh/KrFIpGIMjMzFQ6H5ff7U10OgBRIaIXdDz+URo9u/cUqKqRRo5wpFECzRD6/OW0EoNPxuD36br/vtq/zyJGNp462bpWi56wB4/FI//M/jfsBpBVOGwHo2lwuad68+OAiNX4/b17jfgBphZkXADh/9qWdsy48ABJIDcILADTNvjRd+9KOWRceAAmkDqeNAEA6O/siNX5tY9aFB0ACqUV4AQCpcZZl/nxp8ODGr63MuvAASCD1CC8A0CQYlHbubPzaCh4ACaQe4QUAEsADIIHUI7wAQAJ4ACSQeoQXAEhA0wMgm56XdD6XXAr4Axc+ABJAhyG8AEACmh4AKemCANPqAyABdCjCCwAkKOEHQALoUI6Hl6VLl6pfv37KyMhQUVGRNm/e3Gb/EydOaNq0acrPz5fP59M111yjtWvXOl0mACRk3OBx2j9zv9ZPXq+3x72t9ZPXa9/MfQQXIAkcXWH3nXfeUWlpqZYtW6aioiKVl5dr1KhR2r17t3Jyci7o39DQoB/84AfKycnRypUrVVBQoH/961/KyspyskwAuCQJPQASQIdxGWMuXGmpgxQVFWn48OFasmSJJCkWiykQCGjGjBmaNWvWBf2XLVumF154QV9++aW6d+9+Se+ZyCO1AQBAekjk89ux00YNDQ3asmWLgucs9uR2uxUMBlVVVdXiMWvWrFFxcbGmTZum3Nxc3XDDDZo/f76i5z/t9Rz19fWKRCJxGwAA6LwcCy/Hjh1TNBpVbm5uXHtubq5CoVCLx3z11VdauXKlotGo1q5dq9/85jd68cUX9dxzz7X6PmVlZcrMzGzeAoFAh/4cAAAgvaTV3UaxWEw5OTl67bXXNHToUE2YMEG//vWvtWzZslaPmT17tsLhcPNWXV2dxIoBAECyOXbBbnZ2tjwej2pqauLaa2pqlJeX1+Ix+fn56t69uzyes+sjDB48WKFQSA0NDfJ6vRcc4/P55PP5OrZ4AACQthybefF6vRo6dKgqKyub22KxmCorK1VcXNziMd/+9re1Z88exWKx5rZ//vOfys/PbzG4AACArsfR00alpaVavny5/vjHP2rXrl36xS9+oZMnT2rKlCmSpEmTJmn27NnN/X/xi1/o+PHjmjlzpv75z3/qz3/+s+bPn69p06Y5WSYAALCIo+u8TJgwQbW1tZozZ45CoZCGDBmiioqK5ot4Dxw4ILf7bH4KBAL68MMP9eijj+qmm25SQUGBZs6cqSeffNLJMgEAgEUcXeclFVjnBQAA+6TFOi8AAABOILwAAACrEF4AAIBVCC8AAMAqhBcAAGAVwgsAALAK4QUAAFiF8AIAAKxCeAEAAFYhvAAAAKsQXgAAgFUILwAAwCqEFwAAYBXCCwAAsArhBQAAWIXwAgAArEJ4AQAAViG8AAAAqxBeAACAVQgvAADAKoQXAABgFcILAACwCuEFAABYhfACAACsQngBAABWIbwAAACrEF4AAIBVCC8AAMAqhBcAAGAVwgsAALAK4QUAAFiF8AIAAKxCeAEAAFYhvAAAAKsQXgAAgFUILwAAwCqEFwAAYBXCCwAAsArhBQAAWIXwAgAArEJ4AQAAViG8AAAAqxBeAACAVQgvAADAKoQXAABgFcILAACwCuEFAABYhfACAACsQngBAABWIbwAAACrEF4AAIBVkhJeli5dqn79+ikjI0NFRUXavHlzu45bsWKFXC6X7r77bmcLBAAA1nA8vLzzzjsqLS3V3LlztXXrVt18880aNWqUjh492uZx+/fv1+OPP66SkhKnSwQAABZxPLy89NJLevDBBzVlyhRdd911WrZsma644gq98cYbrR4TjUY1ceJEPfPMM+rfv7/TJQIAAIs4Gl4aGhq0ZcsWBYPBs2/odisYDKqqqqrV45599lnl5ORo6tSpF32P+vp6RSKRuA0AAHRejoaXY8eOKRqNKjc3N649NzdXoVCoxWM+/vhjvf7661q+fHm73qOsrEyZmZnNWyAQuOy6AQBA+kqru43q6up0//33a/ny5crOzm7XMbNnz1Y4HG7eqqurHa4SAACkUjcnXzw7O1sej0c1NTVx7TU1NcrLy7ug/969e7V//37dddddzW2xWKyx0G7dtHv3bg0YMCDuGJ/PJ5/P50D1AAAgHTk68+L1ejV06FBVVlY2t8ViMVVWVqq4uPiC/oMGDdL27du1bdu25u1HP/qRvve972nbtm2cEgIAAM7OvEhSaWmpJk+erGHDhmnEiBEqLy/XyZMnNWXKFEnSpEmTVFBQoLKyMmVkZOiGG26IOz4rK0uSLmgHAABdk+PhZcKECaqtrdWcOXMUCoU0ZMgQVVRUNF/Ee+DAAbndaXXpDQAASGMuY4xJdREdKRKJKDMzU+FwWH6/P9XlAACAdkjk85spDwAAYBXCCwAAsArhBQAAWIXwAgAArEJ4AQAAViG8AAAAqxBeAACAVQgvAADAKoQXAABgFcILAACwCuEFAABYhfACAACsQngBAABWIbwAAACrEF4AAIBVCC8AAMAqhBcAAGAVwgsAALAK4QUAAFiF8AIAAKxCeAEAAFYhvAAAAKsQXgAAgFUILwAAwCqEFwAAYBXCCwAAsArhBQAAWIXwAgAArEJ4AQAAViG8AAAAqxBeAACAVQgvAADAKoQXAABgFcILAACwCuEFAABYhfACAACsQngBAABWIbwAAACrEF4AAIBVCC8AAMAqhBcAAGAVwgsAALAK4QUAAFiF8AIAAKxCeAEAAFYhvAAAAKsQXgAAgFUILwAAwCqEFwAAYBXCCwAAsArhBQAAWCUp4WXp0qXq16+fMjIyVFRUpM2bN7fad/ny5SopKVHPnj3Vs2dPBYPBNvsDAICuxfHw8s4776i0tFRz587V1q1bdfPNN2vUqFE6evRoi/03bNige++9V+vXr1dVVZUCgYBGjhypQ4cOOV0qAACwgMsYY5x8g6KiIg0fPlxLliyRJMViMQUCAc2YMUOzZs266PHRaFQ9e/bUkiVLNGnSpIv2j0QiyszMVDgclt/vv+z6AQCA8xL5/HZ05qWhoUFbtmxRMBg8+4Zut4LBoKqqqtr1GqdOndLp06fVq1evFvfX19crEonEbQAAoPNyNLwcO3ZM0WhUubm5ce25ubkKhULteo0nn3xSffr0iQtA5yorK1NmZmbzFggELrtuAACQvtL6bqMFCxZoxYoVWr16tTIyMlrsM3v2bIXD4eaturo6yVUCAIBk6ubki2dnZ8vj8aimpiauvaamRnl5eW0eu2jRIi1YsEB//etfddNNN7Xaz+fzyefzdUi9AAAg/Tk68+L1ejV06FBVVlY2t8ViMVVWVqq4uLjV4xYuXKh58+apoqJCw4YNc7JEAABgGUdnXiSptLRUkydP1rBhwzRixAiVl5fr5MmTmjJliiRp0qRJKigoUFlZmSTp+eef15w5c/T222+rX79+zdfGXHnllbryyiudLhcAAKQ5x8PLhAkTVFtbqzlz5igUCmnIkCGqqKhovoj3wIEDcrvPTgC9+uqramho0Pjx4+NeZ+7cuXr66aedLhcAAKQ5x9d5STbWeQEAwD5ps84LAABARyO8AAAAqxBeAACAVQgvAADAKoQXAABgFcILAACwCuEFAABYhfACAACsQngBAABWIbwAAACrEF4AAIBVCC8AAMAqhBcAAGAVwgsAALAK4QUAAFiF8AIAAKxCeAEAAFYhvAAAAKsQXgAAgFUILwAAwCqEFwAAYBXCCwAAsArhBQAAWIXwAgAArEJ4AQAAViG8AAAAqxBeAACAVQgvAADAKoQXAABgFcILAACwCuEFAABYhfACAACsQngBAABWIbwAAACrEF4AAIBVCC8AAMAqhBcAAGAVwgsAALAK4QUAAFiF8AIAAKxCeAEAAFYhvAAAAKsQXgAAgFUILwAAwCqEFwAAYBXCCwAAsArhBQAAWIXwAgAArEJ4AQAAViG8AAAAqxBeAACAVZISXpYuXap+/fopIyNDRUVF2rx5c5v933vvPQ0aNEgZGRm68cYbtXbt2mSUCQAALOB4eHnnnXdUWlqquXPnauvWrbr55ps1atQoHT16tMX+f//733Xvvfdq6tSp+uKLL3T33Xfr7rvv1j/+8Q+nSwUAABZwGWOMk29QVFSk4cOHa8mSJZKkWCymQCCgGTNmaNasWRf0nzBhgk6ePKkPPvigue2WW27RkCFDtGzZsou+XyQSUWZmpsLhsPx+f8f9IAAAwDGJfH47OvPS0NCgLVu2KBgMnn1Dt1vBYFBVVVUtHlNVVRXXX5JGjRrVav/6+npFIpG4DQAAdF6Ohpdjx44pGo0qNzc3rj03N1ehUKjFY0KhUEL9y8rKlJmZ2bwFAoGOKR4AAKQl6+82mj17tsLhcPNWXV2d6pIAAICDujn54tnZ2fJ4PKqpqYlrr6mpUV5eXovH5OXlJdTf5/PJ5/N1TMEAACDtOTrz4vV6NXToUFVWVja3xWIxVVZWqri4uMVjiouL4/pL0kcffdRqfwAA0LU4OvMiSaWlpZo8ebKGDRumESNGqLy8XCdPntSUKVMkSZMmTVJBQYHKysokSTNnztRtt92mF198UWPGjNGKFSv0+eef67XXXnO6VAAAYAHHw8uECRNUW1urOXPmKBQKaciQIaqoqGi+KPfAgQNyu89OAN166616++239dRTT+lXv/qVrr76ar3//vu64YYbnC4VAABYwPF1XpKNdV4AALBPIp/fjs+8oHOKRqWNG6UjR6T8fKmkRPJ4Ul0VAKArILwgYatWSTNnSgcPnm0rLJQWL5bGjUtdXQCArsH6dV6QXKtWSePHxwcXSTp0qLF91arU1AUA6DoIL2i3aLRxxqWlq6Sa2h55pLEfAABOIbyg3TZuvHDG5VzGSNXVjf0AAHAK4QXtduRIx/YDAOBSEF7Qbvn5HdsPAIBLQXhBu5WUNN5V5HK1vN/lkgKBxn4AADiF8IJ283gab4eWLgwwTd+Xl7PeCwDAWYQXJGTcOGnlSqmgIL69sLCxnXVeAABOY5E6JGzcOGnsWFbYBQCkBuEFl8Tjkb773VRXAQDoijhtBAAArEJ4AQAAViG8AAAAqxBeAACAVQgvAADAKtxtlGYaGqRXXpH27pUGDJAefljyelNdFQAA6YPwkkaeeEJ66SUpGj3b9vjjUmmptHBh6uoCACCdEF7SxBNPSC+8cGF7NHq2nQADAIDkMsaYVBfRkSKRiDIzMxUOh+X3+1NdTrs0NEhXXBE/43I+j0c6dYpTSACAzimRz28u2E0Dr7zSdnCRGve/8kpy6gEAIJ0RXtLA3r0d2w8AgM6M8JIGBgzo2H4AAHRmXLCbatXVmlZcq7fcUjR24e6jytEhFcrjabxtGgCAro7wkkr19dLw4epeU6PPWulyRHnqp/2aWerjYl0AAMRpo9TyeqW+fSV3y/8MUbl1UAHNfNzLbdIAAPwfwksquVzSvHlSrIXzRZI8imnI/5unhS+4klwYAADpi/CSaiNHSsOHNy7kci6Pp/GU0piRqakLAIA0RXhJtabZl/MXeolGG9tdzLoAAHAuwks6OH/25f9mXTSSWRcAAM5HeEkH58++MOsCAECrCC/pomn2RWLWBQCANhBe0oXLJc2fLw0e3PiVWRcAAFrEInXpJBiUdu5MdRUAAKQ1Zl4AAIBVCC8AAMAqhBcAAGAVwgsAALAK4QUAAFiF8AIAAKxCeAEAAFYhvAAAAKsQXgAAgFVYYbedGhqkV16R9u6VBgyQHn5Y8npTXRUAAF0P4aUdnnhCeumlsw99lqTHH5dKS6WFC1NXFwAAXRHh5SKeeEJ64YUL26PRs+0EGAAAksdljDGpLqIjRSIRZWZmKhwOy+/3X9ZrNTRIV1wRP+NyPo9HOnWKU0gAAFyORD6/uWC3Da+80nZwkRr3v/JKcuoBAACElzbt3dux/QAAwOUjvLRhwICO7QcAAC6fY+Hl+PHjmjhxovx+v7KysjR16lR9/fXXbfafMWOGrr32Wn3jG99Q37599ctf/lLhcNipEi/q4Ycbr2lpi8fT2A8AACSHY+Fl4sSJ2rFjhz766CN98MEH+tvf/qaHHnqo1f6HDx/W4cOHtWjRIv3jH//Qm2++qYqKCk2dOtWpEi/K6228HbotpaVcrAsAQDI5crfRrl27dN111+mzzz7TsGHDJEkVFRW68847dfDgQfXp06ddr/Pee+/ppz/9qU6ePKlu3dp3V3dH3m3UpKV1Xjwe1nkBAKCjpPxuo6qqKmVlZTUHF0kKBoNyu93atGlTu1+n6QdoK7jU19crEonEbR1t4cLG26F/9ztp+vTGr6dOEVwAAEgFRxapC4VCysnJiX+jbt3Uq1cvhUKhdr3GsWPHNG/evDZPNUlSWVmZnnnmmUuutb28XumRRxx/GwAAcBEJzbzMmjVLLperze3LL7+87KIikYjGjBmj6667Tk8//XSbfWfPnq1wONy8VVdXX/b7AwCA9JXQzMtjjz2mBx54oM0+/fv3V15eno4ePRrXfubMGR0/flx5eXltHl9XV6fRo0erR48eWr16tbp3795mf5/PJ5/P1676AQCA/RIKL71791bv3r0v2q+4uFgnTpzQli1bNHToUEnSunXrFIvFVFRU1OpxkUhEo0aNks/n05o1a5SRkZFIeQAAoAtw5ILdwYMHa/To0XrwwQe1efNmffLJJ5o+fbp+8pOfNN9pdOjQIQ0aNEibN2+W1BhcRo4cqZMnT+r1119XJBJRKBRSKBRS9GJr9AMAgC7DsadKv/XWW5o+fbpuv/12ud1u3XPPPfr973/fvP/06dPavXu3Tp06JUnaunVr851IAwcOjHutffv2qV+/fk6VCgAALMJTpQEAQMqlfJ0XAAAApxBeAACAVQgvAADAKoQXAABgFcILAACwCuEFAABYhfACAACsQngBAABWIbwAAACrEF4AAIBVCC8AAMAqhBcAAGAVwgsAALAK4QUAAFiF8AIAAKxCeAEAAFYhvAAAAKt0S3UBAADADsZEdeLERjU0HJHXm6+srBK5XJ6k10F4AQAAF1Vbu0p79sxUff3B5jafr1ADBy5W797jkloLp40AAECbamtXaceO8XHBRZLq6w9px47xqq1dldR6CC8AAKBVxkS1Z89MSaalvZKkPXsekTHRpNVEeAEAAK06cWLjBTMu8Yzq66t14sTGpNVEeAEAAK1qaDjSof06AuEFAAC0yuvN79B+HYHwAgAAWpWVVSKfr1CSq5UeLvl8AWVllSStJsILAABolcvl0cCBi5u+O3+vJGngwPKkrvdCeAEAAG3q3Xucrr9+pXy+grh2n69Q11+/MunrvLBIHQAAuKjevccpO3ssK+wCAAB7uFwe9ez53VSXwWkjAABgF8ILAACwCuEFAABYhfACAACsQngBAABWIbwAAACrEF4AAIBVWOcFAABI1dVSbW3r+3NypMLC5NXTBsILAABdXX29NHy4VFPTep+8PGn/fsnnS1pZreG0EQAAXZ3XK/XtK7lbiQVutxQINPZLA4QXAAC6OpdLmjdPisVa3h+LNe53nf9U6dQgvAAAAGnkyMZTR57zHrTo8TS2jxyZmrpaQHgBAABnZ1+i0fj2aDStZl0kwgsAAGhy/uxLGs66SIQXAADQ5PzZlzScdZEILwAA4FxNsy9SWs66SIQXAABwLpdLmj9fGjy48WuazbpILFIHAADOFwxKO3emuopWMfMCAACsQngBAABWIbwAAACrEF4AAIBVHAsvx48f18SJE+X3+5WVlaWpU6fq66+/btexxhjdcccdcrlcev/9950qEQAAWMix8DJx4kTt2LFDH330kT744AP97W9/00MPPdSuY8vLy+VKw1uzAABA6jlyq/SuXbtUUVGhzz77TMOGDZMkvfzyy7rzzju1aNEi9enTp9Vjt23bphdffFGff/658vPznSgPAABYzJGZl6qqKmVlZTUHF0kKBoNyu93atGlTq8edOnVK9913n5YuXaq8vLx2vVd9fb0ikUjcBgAA2mZMVP/5zwbV1Pyv/vOfDTImevGD0oQjMy+hUEg5OTnxb9Stm3r16qVQKNTqcY8++qhuvfVWjR07tt3vVVZWpmeeeeaSawUAoKuprV2lPXtmqr7+YHObz1eogQMXq3fvcSmsrH0SmnmZNWuWXC5Xm9uXX355SYWsWbNG69atU3l5eULHzZ49W+FwuHmrrq6+pPcHAKArqK1dpR07xscFF0mqrz+kHTvGq7Z2VYoqa7+EZl4ee+wxPfDAA2326d+/v/Ly8nT06NG49jNnzuj48eOtng5at26d9u7dq6ysrLj2e+65RyUlJdqwYUOLx/l8Pvl8vvb+CAAAdFnGRLVnz0xJpqW9klzas+cRZWePlcvlSXJ17ZdQeOndu7d69+590X7FxcU6ceKEtmzZoqFDh0pqDCexWExFRUUtHjNr1iz97Gc/i2u78cYb9bvf/U533XVXImUCAIAWnDix8YIZl3hG9fXVOnFio3r2/G6SqkqcI9e8DB48WKNHj9aDDz6oZcuW6fTp05o+fbp+8pOfNN9pdOjQId1+++3605/+pBEjRigvL6/FWZm+ffvqm9/8phNlAgDQpTQ0HOnQfqni2Dovb731lgYNGqTbb79dd955p77zne/otddea95/+vRp7d69W6dOnXKqBAAAcA6vt31LkLS3X6q4jDEtnfiyViQSUWZmpsLhsPx+f6rLAQAgbRgT1aef9lN9/SG1fN2LSz5foW65ZV/Sr3lJ5PObZxsBANBFuFweDRy4uOm78/dKkgYOLE/ri3UlwgsAAF1K797jdP31K+XzFcS1+3yFuv76lVas8+LIBbsAACB99e49TtnZY3XixEY1NByR15uvrKyStJ9xaUJ4AQCgC3K5PGl9O3RbOG0EAACsQngBAABWIbwAAACrEF4AAIBVCC8AAMAqhBcAAGAVwgsAALAK4QUAAFiF8AIAAKzS6VbYbXpIdiQSSXElAACgvZo+t5s+x9vS6cJLXV2dJCkQCKS4EgAAkKi6ujplZma22cdl2hNxLBKLxXT48GH16NFDLtf5j/uOF4lEFAgEVF1dLb/fn6QKuxbGODkY5+RgnJODcU6OdBtnY4zq6urUp08fud1tX9XS6WZe3G63CgsLEzrG7/enxT9cZ8YYJwfjnByMc3IwzsmRTuN8sRmXJlywCwAArEJ4AQAAVunS4cXn82nu3Lny+XypLqXTYoyTg3FODsY5ORjn5LB5nDvdBbsAAKBz69IzLwAAwD6EFwAAYBXCCwAAsArhBQAAWKVLhZfjx49r4sSJ8vv9ysrK0tSpU/X111+361hjjO644w65XC69//77zhZquUTH+fjx45oxY4auvfZafeMb31Dfvn31y1/+UuFwOIlVp7+lS5eqX79+ysjIUFFRkTZv3txm//fee0+DBg1SRkaGbrzxRq1duzZJldotkXFevny5SkpK1LNnT/Xs2VPBYPCi/y5olOjvc5MVK1bI5XLp7rvvdrbATiDRMT5x4oSmTZum/Px8+Xw+XXPNNen7d8N0IaNHjzY333yz+fTTT83GjRvNwIEDzb333tuuY1966SVzxx13GElm9erVzhZquUTHefv27WbcuHFmzZo1Zs+ePaaystJcffXV5p577kli1eltxYoVxuv1mjfeeMPs2LHDPPjggyYrK8vU1NS02P+TTz4xHo/HLFy40OzcudM89dRTpnv37mb79u1JrtwuiY7zfffdZ5YuXWq++OILs2vXLvPAAw+YzMxMc/DgwSRXbpdEx7nJvn37TEFBgSkpKTFjx45NTrGWSnSM6+vrzbBhw8ydd95pPv74Y7Nv3z6zYcMGs23btiRX3j5dJrzs3LnTSDKfffZZc9tf/vIX43K5zKFDh9o89osvvjAFBQXmyJEjhJeLuJxxPte7775rvF6vOX36tBNlWmfEiBFm2rRpzd9Ho1HTp08fU1ZW1mL/H//4x2bMmDFxbUVFRebnP/+5o3XaLtFxPt+ZM2dMjx49zB//+EenSuwULmWcz5w5Y2699Vbzhz/8wUyePJnwchGJjvGrr75q+vfvbxoaGpJV4mXpMqeNqqqqlJWVpWHDhjW3BYNBud1ubdq0qdXjTp06pfvuu09Lly5VXl5eMkq12qWO8/nC4bD8fr+6det0j99KWENDg7Zs2aJgMNjc5na7FQwGVVVV1eIxVVVVcf0ladSoUa32x6WN8/lOnTql06dPq1evXk6Vab1LHednn31WOTk5mjp1ajLKtNqljPGaNWtUXFysadOmKTc3VzfccIPmz5+vaDSarLIT0mU+GUKhkHJycuLaunXrpl69eikUCrV63KOPPqpbb71VY8eOdbrETuFSx/lcx44d07x58/TQQw85UaJ1jh07pmg0qtzc3Lj23Nxcffnlly0eEwqFWuzf3n+DruhSxvl8Tz75pPr06XNBcMRZlzLOH3/8sV5//XVt27YtCRXa71LG+KuvvtK6des0ceJErV27Vnv27NHDDz+s06dPa+7cuckoOyHWz7zMmjVLLperza29f3jOt2bNGq1bt07l5eUdW7SFnBznc0UiEY0ZM0bXXXednn766csvHEiSBQsWaMWKFVq9erUyMjJSXU6nUVdXp/vvv1/Lly9XdnZ2qsvptGKxmHJycvTaa69p6NChmjBhgn79619r2bJlqS6tRdbPvDz22GN64IEH2uzTv39/5eXl6ejRo3HtZ86c0fHjx1s9HbRu3Trt3btXWVlZce333HOPSkpKtGHDhsuo3C5OjnOTuro6jR49Wj169NDq1avVvXv3yy27U8jOzpbH41FNTU1ce01NTatjmpeXl1B/XNo4N1m0aJEWLFigv/71r7rpppucLNN6iY7z3r17tX//ft11113NbbFYTFLjrO7u3bs1YMAAZ4u2zKX8Lufn56t79+7yeDzNbYMHD1YoFFJDQ4O8Xq+jNScs1RfdJEvThaSff/55c9uHH37Y5oWkR44cMdu3b4/bJJnFixebr776KlmlW+VSxtkYY8LhsLnlllvMbbfdZk6ePJmMUq0yYsQIM3369Obvo9GoKSgoaPOC3R/+8IdxbcXFxVywexGJjrMxxjz//PPG7/ebqqqqZJTYKSQyzv/9738v+Ds8duxY8/3vf99s377d1NfXJ7N0ayT6uzx79mxz1VVXmWg02txWXl5u8vPzHa/1UnSZ8GJM4y283/rWt8ymTZvMxx9/bK6++uq4W3gPHjxorr32WrNp06ZWX0PcbXRRiY5zOBw2RUVF5sYbbzR79uwxR44cad7OnDmTqh8jraxYscL4fD7z5ptvmp07d5qHHnrIZGVlmVAoZIwx5v777zezZs1q7v/JJ5+Ybt26mUWLFpldu3aZuXPncqt0OyQ6zgsWLDBer9esXLky7ve2rq4uVT+CFRId5/Nxt9HFJTrGBw4cMD169DDTp083u3fvNh988IHJyckxzz33XKp+hDZ1qfDy73//29x7773myiuvNH6/30yZMiXuj8y+ffuMJLN+/fpWX4PwcnGJjvP69euNpBa3ffv2peaHSEMvv/yy6du3r/F6vWbEiBHm008/bd532223mcmTJ8f1f/fdd80111xjvF6vuf76682f//znJFdsp0TG+aqrrmrx93bu3LnJL9wyif4+n4vw0j6JjvHf//53U1RUZHw+n+nfv7/57W9/m7b/gXQZY0yyT1UBAABcKuvvNgIAAF0L4QUAAFiF8AIAAKxCeAEAAFYhvAAAAKsQXgAAgFUILwAAwCqEFwAAYBXCCwAAsArhBQAAWIXwAgAArEJ4AQAAVvn/EvDybQr8KAkAAAAASUVORK5CYII=\n"
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "p = net.predict(weight)\n",
        "\n",
        "class_data = {\n",
        "    i:[]\n",
        "    for i in range(3)\n",
        "}\n",
        "\n",
        "for text,cls in zip(corpus, p):\n",
        "    class_data[cls.item()].append(text)\n",
        "\n",
        "class_data"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "0f2KeDiuEyQA",
        "outputId": "aaca1b10-669c-459c-eb6c-5d0245b16194"
      },
      "execution_count": 51,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{0: ['A man is riding a horse.',\n",
              "  'A man is riding a white horse on an enclosed ground.'],\n",
              " 1: ['花呗更改绑定银行卡',\n",
              "  '我什么时候开通了花呗',\n",
              "  'The girl is carrying a baby.',\n",
              "  'A woman is playing violin.',\n",
              "  'Two men pushed carts through the woods.'],\n",
              " 2: ['A man is eating food.', 'A man is eating a piece of bread.']}"
            ]
          },
          "metadata": {},
          "execution_count": 51
        }
      ]
    }
  ]
}